/*
  author Sylvain Bertrand <sylvain.bertrand@gmail.com>
  Protected by linux GNU GPLv2
  Copyright 2012-2014
*/
#include <linux/pci.h>
#include <linux/interrupt.h>
#include <linux/cdev.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/time.h>

#include <alga/rng_mng.h>
#include <alga/timing.h>
#include <uapi/alga/pixel_fmts.h>
#include <uapi/alga/amd/dce6/dce6.h>

#include "mc.h"
#include "rlc.h"
#include "ih.h"
#include "fence.h"
#include "ring.h"
#include "dmas.h"
#include "ba.h"
#include "cps.h"
#include "gpu.h"
#include "drv.h"

#include "regs.h"

void ih_ena(struct pci_dev *dev)
{
	u32 ih_ctl;
	u32 ih_rb_ctl;

	ih_ctl = rr32(dev, IH_CTL);
	ih_rb_ctl = rr32(dev, IH_RB_CTL);

	ih_ctl |= IC_ENA_INTR;
	ih_rb_ctl |= IRC_IH_RB_ENA;
	wr32(dev, ih_ctl, IH_CTL);
	wr32(dev, ih_rb_ctl, IH_RB_CTL);
}

void ih_dis(struct pci_dev *dev)
{
	u32 ih_rb_ctl;
	u32 ih_ctl;

	ih_rb_ctl = rr32(dev, IH_RB_CTL);
	ih_ctl = rr32(dev, IH_CTL);

	ih_rb_ctl &= ~IRC_IH_RB_ENA;
	ih_ctl &= ~IC_ENA_INTR;

	/* works even if ucode in not loaded */
	wr32(dev, ih_rb_ctl, IH_RB_CTL);
	wr32(dev, ih_ctl, IH_CTL);
}

void ih_reset(struct pci_dev *dev)
{
	wr32(dev, 0, IH_RB_RPTR);
	wr32(dev, 0, IH_RB_WPTR);
}

/* ih ring size is 2^IH_RING_LOG2_DWS(=14) dwords or 4096 vectors of 16 bytes */
void ih_init(struct pci_dev *dev)
{
	struct dev_drv_data *dd;
	u32 intr_ctl_0;
	u32 ih_rb_ctl;
	u32 ih_ctl;
	u64 wb_ih_wptr_gpu_addr;

	dd = pci_get_drvdata(dev);

	/*
	 * setup interrupt control
	 * set dummy read gpu address to ring gpu address
	 * 256 bytes block index
	 */
	wr32(dev, dd->ba.ih_ring_map->gpu_addr >> 8, INTR_CTL_1); 

	intr_ctl_0 = rr32(dev, INTR_CTL_0);
	/*
	 * IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled
	 *                          without msi
	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_ENA
	 */
	intr_ctl_0 &= ~IC_IH_DUMMY_RD_OVERRIDE;

	/* IH_REQ_NONSNOOP_ENA=1 if ring is in non-cacheable mem, e.g. vram */
	intr_ctl_0 &= ~IC_IH_REQ_NONSNOOP_ENA; /* we are in bus aperture */
	wr32(dev, intr_ctl_0, INTR_CTL_0);

	/* 256 bytes block index */
	wr32(dev, dd->ba.ih_ring_map->gpu_addr >> 8, IH_RB_BASE); 

	ih_rb_ctl = (IRC_IH_WPTR_OVERFLOW_ENA | IRC_IH_WPTR_OVERFLOW_CLR
				| set(IRC_IH_IB_LOG2_DWS, IH_RING_LOG2_DWS)
					| IRC_IH_WPTR_WRITEBACK_ENA);

	/* must be dw aligned */
	wb_ih_wptr_gpu_addr = dd->ba.wb_map->gpu_addr + WB_IH_WPTR_OF;
	wr32(dev, lower_32_bits(wb_ih_wptr_gpu_addr), IH_RB_WPTR_ADDR_LO);
	wr32(dev, upper_32_bits(wb_ih_wptr_gpu_addr), IH_RB_WPTR_ADDR_HI);
	wr32(dev, ih_rb_ctl, IH_RB_CTL);

	ih_reset(dev);

	/* default settings for IH_CTL (disabled at first) */
	ih_ctl = set(IC_MC_WR_REQ_CREDIT, 0x10) | set(IC_MC_WR_CLEAN_CNT, 0x10)
					| set(IC_MC_VM_ID, 0) | IC_RPTR_REARM;
	wr32(dev, ih_ctl, IH_CTL);

	dd->ih.rp = 0;
	spin_lock_init(&dd->ih.lock);
};

#define VECTOR_SZ 16
#define VECTOR_ID_D0	1
#define VECTOR_ID_D1	2
#define VECTOR_ID_D2	3
#define VECTOR_ID_D3	4
#define VECTOR_ID_D4	5
#define VECTOR_ID_D5	6
#define		Dx_VBLANK 0
#define VECTOR_ID_HPD	42
#define VECTOR_ID_EOP	181
#define VECTOR_ID_DMA_0	224
#define VECTOR_ID_DMA_1	244

/*
 * Each ring entry is 128 bits:
 * [7:0]    - interrupt source id
 * [31:8]   - reserved
 * [59:32]  - interrupt source data
 * [63:60]  - reserved
 * [71:64]  - RINGID
 * [79:72]  - VMID
 * [127:80] - reserved
 */
static void vector(struct pci_dev *dev, u32 id, u32 data, u8 ring_id,
					u8 *irq_thd, u8 *dce6_irqs_acked)
{
	struct dev_drv_data *dd;

	dd = pci_get_drvdata(dev);

	switch (id) {
	case VECTOR_ID_HPD:
		if (!*dce6_irqs_acked) {
			dce6_irqs_ack(dd->dce);
			*dce6_irqs_acked = 1;
                }
		dce6_hpd_irq(dd->dce, data);
		*irq_thd = IRQ_THD_ENA;
		break;
	case VECTOR_ID_D0:
	case VECTOR_ID_D1:
	case VECTOR_ID_D2:
	case VECTOR_ID_D3:
	case VECTOR_ID_D4:
	case VECTOR_ID_D5:
		if (!*dce6_irqs_acked) {
			dce6_irqs_ack(dd->dce);
			*dce6_irqs_acked = 1;
                }
		if (data == Dx_VBLANK) {/* only page flipping in vblank */
			struct timespec tp;
			getrawmonotonic(&tp);

			dce6_pf_irq(dd->dce, id - 1, tp);
		}
		break;
	case VECTOR_ID_EOP:
		switch(ring_id) {
		case 0:
			atomic_set(&dd->gpu_3d.fence.bottom,
				le32_to_cpup(dd->gpu_3d.fence.cpu_addr));

			wake_up(&dd->gpu_3d.fence.wait_queue);
			break;
		case 1:
			atomic_set(&dd->gpu_c0.fence.bottom,
				le32_to_cpup(dd->gpu_c0.fence.cpu_addr));

			wake_up(&dd->gpu_c0.fence.wait_queue);
			break;
		case 2:
			atomic_set(&dd->gpu_c1.fence.bottom,
				le32_to_cpup(dd->gpu_c1.fence.cpu_addr));

			wake_up(&dd->gpu_c1.fence.wait_queue);
			break;
		};
		break;
	case VECTOR_ID_DMA_0:
		atomic_set(&dd->dmas[0].fence.bottom,
				le32_to_cpup(dd->dmas[0].fence.cpu_addr));

		wake_up(&dd->dmas[0].fence.wait_queue);
		break;
	case VECTOR_ID_DMA_1:
		atomic_set(&dd->dmas[1].fence.bottom,
				le32_to_cpup(dd->dmas[1].fence.cpu_addr));
		wake_up(&dd->dmas[1].fence.wait_queue);
		break;
	}
}

static u64 wp_overflow(struct pci_dev *dev, u32 wp)
{
	struct dev_drv_data *dd;
	u32 tmp;

	dd = pci_get_drvdata(dev);

	if ((wp & IRW_RB_OVERFLOW) != 0) {
		/*
		 * When a ring buffer overflow happen start parsing interrupt
		 * from the last not overwritten vector (wptr + 16). Hopefully
		 * this should allow us to catchup.
		 */
		dev_warn(&dev->dev, "ih ring buffer overflow wp=0x%08x"
				"rp=0x%08x, trying next vector at 0x%08x\n",
				(u32)(wp & (~IRW_RB_OVERFLOW)), dd->ih.rp,
					(wp + VECTOR_SZ) & IH_RING_MASK);

		dd->ih.rp = (wp + VECTOR_SZ) & IH_RING_MASK;

		tmp = rr32(dev, IH_RB_CTL);
		tmp |= IRC_IH_WPTR_OVERFLOW_CLR;
		wr32(dev, tmp, IH_RB_CTL);

		wp &= ~IRW_RB_OVERFLOW;
	}
	return wp;
}

u8 ih_parse(struct pci_dev *dev)
{
	struct dev_drv_data *dd;
	unsigned long flgs;
	u32 wp;
	u32 rp;
	u8 irq_thd;

	dd = pci_get_drvdata(dev);
	irq_thd = IRQ_THD_DIS;

	spin_lock_irqsave(&dd->ih.lock, flgs);
	while (1) {
		u8 dce6_irqs_acked;

		rmb();

		wp = le32_to_cpup(dd->ba.wb_map->cpu_addr + WB_IH_WPTR_OF);
		wp = wp_overflow(dev, wp);
		rp = dd->ih.rp;

		if (rp == wp)
			break;

		dce6_irqs_acked = 0;
		do {
			u32 id;
			u32 data;
			u8 ring_id;

			id =  le32_to_cpup(dd->ba.ih_ring_map->cpu_addr + rp)
									& 0xff;
			data = le32_to_cpup(dd->ba.ih_ring_map->cpu_addr + rp
						+ sizeof(id)) & 0xfffffff;
			ring_id = le32_to_cpup(dd->ba.ih_ring_map->cpu_addr + rp
					+ sizeof(id) + sizeof(data)) & 0xff;

			vector(dev, id, data, ring_id, &irq_thd,
							&dce6_irqs_acked);

			rp += VECTOR_SZ;
			rp &= IH_RING_MASK;
		} while (rp != wp);
		dd->ih.rp = rp;
	}
	wr32(dev, dd->ih.rp, IH_RB_RPTR);
	spin_unlock_irqrestore(&dd->ih.lock, flgs);
	return irq_thd;
}
